This will be a quick summary for those who do not want to know the detailed implementation.
This Program solved a data input process that took up to 5 days to complete. It was highly tedious and required
regular intervention.
The program takes in three excel sheets Sheet1, Sheet2, Sheet3. These sheets contain copied data
directly from pdf files. It then churns out four sheets into Final_Format,
Data 1, Spare Data, Data 2. Final_Format contains general data, a sort of table of contents
overview. It is a cleanup of extra lines and unnecessary data in Sheet1. Data 1, Spare Data, and Data 2
contain data that is a more granular view of the data. This data is retreived via regular expression from Sheet2 and
Sheet3.
Sub GeneralReformat()
Dim GeneralSheet As Worksheet
Dim TitleSheet As Worksheets
Dim DataOneSheet as Worksheet
Dim DataTwoSheet As Worksheet
Dim FinalSheet As Worksheet
Dim SpareDataSheet As Worksheet
Dim NameCheck As Boolean
NameCheck = CheckWorkbookNames
If NameCheck <> True Then
Exit Sub
End If
Set GeneralSheet = Workbooks("Data Formatter.xlsm").Worksheets("Sheet2")
Set TitleSheet = Workbooks("Data Formatter.xlsm").Worksheets("Sheet3")
' Format the TitleSheet
TitleSheet.Activate
TitleSheet.Copy After:=TitleSheet
ActiveSheet.Name = "FormattedTitles"
' Create sheets of parts 2 and 3 of the sections
Sheets.Add(After:=Sheets("FormattedTitles")).Name = "Data 2 Sheet"
Sheets.Add(After:=Sheets("FormattedTitles")).Name = "Data 1 Sheet"
Set TitleSheet = Workbooks("Data Formatter.xlsm").Worksheets("FormattedTitles")
Set DataOneSheet = Workbooks("Data Formatter.xlsm").Worksheets("Data 1 Sheet")
Set DataTwoSheet = Workbooks("Data Formatter.xlsm").Worksheets("Data 2 Sheet")
' Insert the column titles for the data 1 and data 2 sheets
SetColumnsDataSheet DataOneSheet
SetColumnsDataSheet DataTwoSheet
' populate the data into the data 1 and data 2 sheets
PopulateDataSheet TitleSheet, DataOneSheet, DataTwoSheet, GeneralSheet
StyleDataSheet DataOneSheet
StyleDataSheet DataTwoSheet
' Format the General sheet
GeneralSheet.Activate
GeneralSheet.Copy After:=GeneralSheet
ActiveSheet.Name = "FormattedGeneral"
Set GeneralSheet = Workbooks("Data Formatter.xlsm").Worksheets("FormattedGeneral")
PopulateGeneral GeneralSheet
' Initial Formating to remove all extra rows and concatinate the values
Set FinalSheet = Workbooks("Data Formatter.xlsm").Sheets(1)
FinalSheet.Copy After:=FinalSheet
Set FinalSheet = Workbooks("Data Formatter.xlsm").Sheets(2)
FinalSheet.Name = "Final_Format"
FinalSheet.Activate
PopulateFinal FinalSheet
' With the first sheet input the Data for all GD's
Dim Bottom As Integer
FinalColumnLabels FinalSheet
FinalDataInput FinalSheet
Set Bottom = FinalSheet.Range("F65536").End(xlUp)
StyleFinal Bottom
' Move All sheets to final format workbook and add spare data sheet
Application.ScreenUpdating = False
Application.DisplayAlerts = False
Workbooks.Add.SaveAs Filename:="Final_Format"
FinalSheet.Move Before:=Workbooks("Final_Format.xlsx").Sheets(1)
DataOneSheet.Move After:=Workbooks("Final_Format.xlsx").Sheets(1)
DataTwoSheet.Move After:=Workbooks("Final_Format.xlsx").Sheets(2)
Workbooks("Final_Format.xlsx").Sheets(4).Delete
Workbooks("Final_Format.xlsx").Sheets.Add After:=Workbooks("Final_Format.xlsx").Sheets(2)
Workbooks("Final_Format.xlsx").Sheets(3).Name = "Spare Data"
Set SpareDataSheet = Workbooks("Final_Format.xlsx").Sheets(3)
SetColumnsDataSheet SpareDataSheet
StyleDataSheet SpareDataSheet
' Remove created sheets in Data formatter
Workbooks("Data Formatter.xlsm").Activate
Sheets(3).Delete
Sheets(4).Delete
Application.DisplayAlerts = True
Application.ScreenUpdating = True
Workbooks("Final_Format.xlsx").Activate
Sheets(1).Activate
End Sub
Function CheckWorkbookNames() As Boolean
Dim WB As Workbook
Dim IsOpen As Boolean
Dim Sheet1 As Boolean
Dim Sheet2 As Boolean
Dim ParaTOC As Boolean
IsOpen = False
For Each WB In Workbooks
If Not WB.Name <> "Data Formatter.xlsm" Then
IsOpen = True
Exit For
End If
Next WB
If IsOpen <> True Then
MyMsg = MsgBox("Check your Workbook Name", vbOKOnly)
CheckWorkbookNames = False
Exit Function
End If
If Workbooks("Data Formatter.xlsm").Sheets.Count <> 3 Then
MyMsg = MsgBox("Check Sheets, Sheets Names expected: Sheet1, Sheet2, Sheet3", vbOKOnly)
CheckWorkbookNames = False
Exit Function
End If
For i = 1 To Workbooks("Data Formatter.xlsm").Sheets.Count
With Workbooks("Data Formatter.xlsm")
If Not .Worksheets(i).Name <> "Sheet1" Then
Sheet1 = True
ElseIf Not .Worksheets(i).Name <> "Sheet2" Then
Sheet2 = True
ElseIf Not .Worksheets(i).Name <> "Sheet3" Then
ParaTOC = True
End If
End With
Next i
If Sheet1 <> True Or Sheet2 <> True Or ParaTOC <> True Then
MyMsg = MsgBox("Check Sheets, Sheets Names expected:" & vbCrLf & vbCrLf & "Sheet1, Sheet2, Sheet3", vbOKOnly)
CheckWorkbookNames = False
Exit Function
End If
CheckWorkbookNames = True
End Function
Function SetColumnsDataSheet(PartSheet)
With PartSheet.Activate
Range("A1") = "Item#"
Range("B1") = "ID#"
Range("C1") = "General Title"
Range("D1") = "General Section"
Range("E1") = "Paragraph#"
Range("G1") = "Paragraph Title"
Range("K1") = "Check These Rows"
Range("K1").Interior.ColorIndex = 6
End With
End Function
Function PopulateDataSheet(TitleSheet, DataOneSheet, DataTwoSheet, GeneralSheet)
Dim Bottom As Range
Dim BottomTwo As Range
Dim BottomThree As Range
Dim CurrentCell As Range
Dim BottomRow As Integer
Dim ErrorCount As Integer
Dim CellValue As String
Dim str As String
Dim ParagraphNum As String
Dim ParagraphCell As RegExp
Dim SectionNum As RegExp
Dim SectionNumLong As RegExp
Dim PageNum As RegExp
Dim PartTwo As RegExp
Dim PartThree As RegExp
Dim ReplacePart As Object
Dim MultipleParaNum As Object
Dim ExtactedValues As Object
Dim CheckCell As Boolean
' Create RegEx check values
Set ParagraphCell = New RegExp
Set SectionNum = New RegExp
Set SectionNumLong = New RegExp
Set PageNum = New RegExp
Set PartTwo = New RegExp
Set PartThree = New RegExp
Set ReplacePart = New RegExp
Set MultipleParaNum = New RegExp
ParagraphCell.Pattern = "^(\d{1,2}\.\d{1,2})"
SectionNum.Pattern = "SECTION (\d{2}\s\d{2}\s\d{2})$"
SectionNumLong.Pattern = "SECTION (\d{2}\s\d{2}\s\d{2}\.\d{2}\s\d{2})$"
PageNum.Pattern = "Page \d\s"
PartTwo.Pattern = "^[2]\.(\d{1,2})"
PartThree.Pattern = "^[3]\.(\d{1,2})"
ReplacePart.Pattern = "\s+DATA \d(\s*\w*)*$"
With MultipleParaNum
.Pattern = "((\d+\.{0,1}){2,8}\s+)(\w*\s*\(*\)*-*)+[$|(!2-3]?"
.Global = True
End With
' find the last cell with a value
Set Bottom = TitleSheet.Range("A65536").End(xlUp)
Set BottomTwo = DataOneSheet.Range("D65536").End(xlUp)
Set BottomThree = DataTwoSheet.Range("D65536").End(xlUp)
BottomRow = Bottom.Row
' left align all data
Range("A1:I" & BottomRow).IndentLevel = 0
With TitleSheet.Activate
i = 1
ErrorCount = 0
While i <= BottomRow
Set CurrentCell = Range("A" & i)
If ReplacePart.Test(CurrentCell) <> False Then
CurrentCell.Value2 = ReplacePart.Replace(CurrentCell.Text, "")
End If
If ParagraphCell.Test(CurrentCell) <> False Then
' check for values it other cells concatinate if there is a value
For J = 1 To 8
If CurrentCell.Offset(0, J) <> "" Then
CurrentCell = CurrentCell.Text & " " & CurrentCell.Offset(0, J).Text
End If
Next J
' Find multiple Paragraph numbers in cell, Highlight these cells
' Will need to manually seperate these values
If MultipleParaNum.Test(CurrentCell) <> False Then
J = 1
Set ExtactedValues = MultipleParaNum.Execute(CurrentCell)
If ExtactedValues.Count > 1 Then
CurrentCell.Interior.ColorIndex = 6
CurrentCell.Offset(1, 0).EntireRow.Insert
i = i + 1
CurrentCell.Offset(1, 0) = CurrentCell.Value2
Set CurrentCell = Range("A" & i)
' Set the row location for problem cell in the Data sheet
CheckCell = True
If Left(CurrentCell, Left(CurrentCell, 1)) = 2 Then
ErrorCount = DataOneSheet.Range("J65536").End(xlUp).Row + 1
ElseIf Left(CurrentCell, Left(CurrentCell, 1)) = 3 Then
ErrorCount = DataTwoSheet.Range("J65536").End(xlUp).Row + 1
End If
End If
End If
'get the Paragraph number and Paragraph title and populate
ParagraphNum = Left(CurrentCell.Text, InStr(CurrentCell.Text, " ") - 1)
ParagraphTitle = Right(CurrentCell.Text, Len(CurrentCell.Text) - Len(Left(CurrentCell.Text, InStr(CurrentCell.Text, " "))))
CurrentCell = ParagraphNum
CurrentCell.Offset(0, 1) = ParagraphTitle
' Populate Sheet Part2 and Part3 if it starts with 2 or 3
If PartTwo.Test(ParagraphNum) <> False Then
BottomTwo.Offset(1, 0) = SectionNumber
BottomTwo.Offset(1, -1) = FindSpecTitle(BottomTwo.Offset(1, 0))
BottomTwo.Offset(1, 1) = ParagraphNum
BottomTwo.Offset(1, 3) = ParagraphTitle
If CheckCell = True Then
DataOneSheet.Range("K" & ErrorCount).Interior.ColorIndex = 6
DataOneSheet.Range("K" & ErrorCount) = "Row: " & BottomTwo.Row + 1
End If
ElseIf PartThree.Test(ParagraphNum) <> False Then
BottomThree.Offset(1, 0) = SectionNumber
BottomThree.Offset(1, -1) = FindSpecTitle(BottomThree.Offset(1, 0))
BottomThree.Offset(1, 1) = ParagraphNum
BottomThree.Offset(1, 3) = ParagraphTitle
If CheckCell = True Then
DataTwoSheet.Range("K" & ErrorCount).Interior.ColorIndex = 6
DataTwoSheet.Range("K" & ErrorCount) = "Row: " & BottomThree.Row + 1
End If
End If
ElseIf SectionNum.Test(CurrentCell.Text) <> False Or SectionNumLong.Test(CurrentCell.Text) <> False Then
If PageNum.Test(CurrentCell.Text) <> True Then
' Seperate long paragraph numbers from short paragraph numbers
If SectionNum.Test(CurrentCell.Text) <> False Then
Set SectionText = SectionNum.Execute(CurrentCell.Text)
Else
Set SectionText = SectionNumLong.Execute(CurrentCell.Text)
End If
SectionNumber = SectionText.Item(0).submatches.Item(0)
' populate the section number in all sheets
CurrentCell = SectionNumber
Else
CurrentCell.EntireRow.Delete
i = i - 1
End If
Else
CurrentCell.EntireRow.Delete
i = i - 1
End If
i = i + 1
CheckCell = False
' Reset the bottom of the sheets
Set Bottom = TitleSheet.Range("A65536").End(xlUp)
BottomRow = Bottom.Row
Set BottomTwo = DataOneSheet.Range("D65536").End(xlUp)
Set BottomThree = DataTwoSheet.Range("D65536").End(xlUp)
Wend
End With
End Function
Function StyleDataSheet(PartSheet)
Dim Bottom As Range
Dim ParagraphClass As RegExp
Dim StartWithSpace As RegExp
Set ParagraphClass = New RegExp
Set StartWithSpace = New RegExp
ParagraphClass.Pattern = "\d\.\d{1,2}\."
StartWithSpace.Pattern = "^\s."
Set Bottom = PartSheet.Range("E65536").End(xlUp)
With PartSheet.Activate
i = 2
' Format the top row to stay at top of sheet when scrolled
Range("A1:I1").Font.Bold = True
Application.ScreenUpdating = True
With ActiveWindow
.SplitColumn = 0
.SplitRow = 1
.FreezePanes = True
End With
While i <= Bottom.Row
CurrentCell = Range("E" & i)
' Indent sub-Paragraphs
If ParagraphClass.Test(CurrentCell) <> False Then
Range("E" & i).IndentLevel = 1
End If
' remove proceeding spaces in paragraph titles
RemoveFirstChar("G", i)
' remove proceeding spaces in General Titles
RemoveFirstChar("C", i)
i = i + 1
Wend
' Set Format for all text, resize cells
With Range("A1:I" & Bottom.Row)
With .Font
.Name = "Courier New"
.Size = 10
End With
End With
PartSheet.Columns("A:I").AutoFit
End With
End Function
Private Function RemoveFirstChar(column, row)
While StartWithSpace.Test(Range(column & row)) <> False
Range(column & row) = Right(Range(column & row), Len(Range(column & row)) - 1)
Wend
End Function
Function PopulateGeneral(GeneralSheet)
Dim Bottom As Range
Dim BottomRow As Integer
Dim CellValue As String
Dim FirstCellFormat As RegExp
Dim NoAZCell As RegExp
Dim ConcatCell As RegExp
Dim ConcatCellDot As RegExp
' Create all regex check values
Set FirstCellFormat = New RegExp
Set NoAZCell = New RegExp
Set ConcatCell = New RegExp
Set ConcatCellDot = New RegExp
FirstCellFormat.Pattern = "^(\d{1,2}(\s|$))"
NoAZCell.Pattern = "^[^a-zA-Z]+$"
ConcatCell.Pattern = "^(\d{2}\s\d{2}\s\d{2}\s)"
ConcatCellDot.Pattern = "^(\d{2}\s\d{2}\s\d{2}\.\d{2}\s\d{2})"
' find the last cell with a value
Set Bottom = GeneralSheet.Range("A65536").End(xlUp)
BottomRow = Bottom.Row
With GeneralSheet.Activate
i = 1
While i < BottomRow
' remove non-SpecNumber rows
Cells(i, 1) = Cells(i, 1).Text
' Run checks on remaining cell for the format that they have been exported in
' Checks if there are no values a-z in the cell
If NoAZCell.Test(Cells(i, 1)) <> False Then
CellValue = Cells(i, 1).Text
For J = 2 To 4
If Cells(i, J) <> "" Then
CellValue = CellValue & " " & Cells(i, J).Text
End If
Next J
Range("A" & i).NumberFormat = "@"
Cells(i, 1).Value = CellValue
' check for cell overflow
If Cells(i, 6) <> "" Then
Cells(i, 5) = Cells(i, 5) & " " & Cells(i, 6)
End If
' at the moment there will be spaces before the Title in column five, if this is a standard of 3 it can be hard coded
' Check if title is in the same cell as the General number
ElseIf ConcatCell.Test(Cells(i, 1)) <> False Then
Cells(i, 5) = Right(Cells(i, 1), (Len(Cells(i, 1)) - 8))
Cells(i, 1) = Left(Cells(i, 1), 8)
' check for cell overflow
If Cells(i, 6) <> "" Then
Cells(i, 5) = Cells(i, 5) & " " & Cells(i, 6)
End If
' Check if title is in the same cell as the General number
ElseIf ConcatCellDot.Test(Cells(i, 1)) <> False Then
Cells(i, 5) = Right(Cells(i, 1), (Len(Cells(i, 1)) - 14))
Cells(i, 1) = Left(Cells(i, 1), 14)
' check for cell overflow
If Cells(i, 6) <> "" Then
Cells(i, 5) = Cells(i, 5) & " " & Cells(i, 6)
End If
End If
If FirstCellFormat.Test(Cells(i, 1)) <> True Then
' Check if it is an extended cell value from cell above
If Not Range("A" & i) <> "" And Not Range("B" & i) <> "" And Not Range("C" & i) <> "" And Not Range("D" & i) <> "" Then
Cells(i - 1, 5) = Cells(i - 1, 5) & " " & Cells(i, 5)
End If
Cells(i, 1).EntireRow.Delete
i = i - 1
BottomRow = BottomRow - 1
End If
i = i + 1
Wend
Range("B1:D1").EntireColumn.Delete
Range("C1").EntireColumn.Delete
End With
End Function
Function PopulateFinal(FinalSheet)
Dim Bottom As Range
Dim FindSection As RegExp
Dim FindSD As RegExp
Dim FindParagraph As RegExp
Dim FindPart As RegExp
Set FindSection = New RegExp
Set FindSD = New RegExp
Set FindParagraph = New RegExp
Set FindPart = New RegExp
FindSection.Pattern = "^(\d{2}\s){2}(\d{2})"
FindSD.Pattern = "^(GD-\d{2})"
FindParagraph.Pattern = "^(\d{1,2})\."
FindPart.Pattern = "^Data\s"
Set Bottom = FinalSheet.Range("d65536").End(xlUp)
With FinalSheet.Activate
' delete the first two rows
Range("A1:A2").EntireRow.Delete
' Delete column 1-2 and 7-18
Range("A1:B1").EntireColumn.Delete
Range("E1:P1").EntireColumn.Delete
For i = 1 To Range(Cells(1, 1), Cells(Bottom.Row, 1)).Count
' Change the cell values from a date format to a general format
If Cells(i, 1).NumberFormat <> "General" Then
Cells(i, 1) = Cells(i, 1).Text
End If
' Find and remove the Page Headers
If Cells(i, 1) <> "" And FindSection.Test(Cells(i, 1)) <> True Then
Range(Cells(i, 1), Cells(i + Cells(i, 1).MergeArea.Rows.Count - 1, 1)).EntireRow.Delete
i = i - Cells(i, 1).MergeArea.Rows.Count
' Check if the Cells should be merged based on the Paragraph being empty and No GD Value
ElseIf Not Cells(i, 3) <> "" And Not Cells(i, 4) <> "" And FindSD.Test(Cells(i, 2)) <> True Then
If Cells(i, 2) <> "" Then
Cells(i - 1, 2) = Cells(i - 1, 2) & " " & Cells(i, 2)
End If
Cells(i, 1).EntireRow.Delete
i = i - 1
' Check if the paragraph contains a paragraph number or some Data x and concatinate with cell above
ElseIf Cells(i, 3) <> "" And FindParagraph.Test(Cells(i, 3)) <> True And FindPart.Test(Cells(i, 3)) <> True Then
If Cells(i, 2) <> "" Then
Cells(i, 2) = Cells(i, 2) & Cells(i, 3)
Cells(i - 1, 2) = Cells(i - 1, 2) & " " & Cells(i, 2)
End If
Cells(i, 1).EntireRow.Delete
i = i - 1
End If
Set Bottom = FinalSheet.Range("b65536").End(xlUp)
If Not i < Range(Cells(1, 1), Cells(Bottom.Row, 1)).Count Then GoTo BottomSheet
Next i
End With
BottomSheet:
End Function
Function FinalColumnLabels(FinalSheet)
Dim Bottom As Range
Set Bottom = FinalSheet.Range("D65536").End(xlUp)
With FinalSheet.Activate
' Create Rows and columns for the categories
Range("A1").EntireColumn.Insert
Range("A1").EntireColumn.Insert
Range("A1").EntireRow.Insert
Range("D1").EntireColumn.Insert
Range("F1").EntireColumn.Insert
' Populate the new Sections
Range("B1") = "General Section"
Range("C1") = "Paragraph #"
Range("D1") = "General Title"
' Move General Title
Range("G:G").Cut
Range("D:D").Insert
Range("E1") = "Description"
Range("F1") = "GD#"
Range("G1") = "Classification"
End With
End Function
Function FinalDataInput(FinalSheet)
Dim Bottom As Range
Dim Lookup As Range
Dim NLookup As Range
Dim NextLookup As Range
Dim LookupValue As String
Dim FirstSD As Range
Dim FistSpec As Range
Set Bottom = FinalSheet.Range("F65536").End(xlUp)
Set FirstSD = FinalSheet.Range(Cells(1, 6), Cells(Bottom.Row, 6)).Find("GD*", LookIn:=xlValues)
With FinalSheet.Activate
' Populate the General Section
For i = 2 To Range(Cells(2, 3), Cells(Bottom.Row + 1, 3)).Count
If Not Cells(i, 3) <> "" Then
Cells(i, 3) = Cells(i - 1, 3)
End If
Next
' populate the GD number
Set Lookup = FirstSD
Set NextLookup = Range(Cells(1, 6), Cells(Bottom.Row, 6)).FindNext(Lookup)
Do
LookupValue = Left(Lookup, 5)
If Not NextLookup.Address <> FirstSD.Address Then
Range(Lookup.Address, Bottom.Address).Offset(0, 1) = LookupValue
Else
Range(Lookup.Address, NextLookup.Address).Offset(0, 1) = LookupValue
End If
If Lookup.Address <> FirstSD.Address Then
Range(Lookup.Address).EntireRow.Delete
End If
Set Lookup = NextLookup
Set NextLookup = Range(Cells(1, 6), Cells(Bottom.Row, 6)).FindNext(Lookup)
Loop While Lookup.Address <> FirstSD.Address
LookupValue = Left(Lookup, 5)
Range(FirstSD.Address).EntireRow.Delete
For i = 2 To Range(Cells(2, 1), Cells(Bottom.Row + 1, 1)).Count
' number the rows
Cells(i, 1) = (i - 1)
' input General Titles
Cells(i, 2) = FindSpecTitle(Cells(i, 3))
Next i
End With
End Function
Function FindSpecTitle(SectionNum)
Dim GeneralSheet As Worksheet
Dim FoundHere As Range
Set GeneralSheet = Workbooks("Data Formatter.xlsm").Worksheets("FormattedGeneral")
Set FoundHere = GeneralSheet.Range("A:A").Find(SectionNum, LookIn:=xlValues, LookAt:=xlWhole)
If (FoundHere Is Nothing) Then
FindSpecTitle = ""
Else
FindSpecTitle = FoundHere.Offset(0, 1).Value
End If
End Function
Function StyleFinal(Bottom)
Dim CurrentCell As String
Dim NoSpaceCell As String
Dim SpaceCount As Integer
Dim HasSpace As RegExp
Dim AllSpaces As Object
Set HasSpace = New RegExp
Set AllSpaces = New RegExp
HasSpace.Pattern = "^\s"
AllSpaces.Pattern = "\s+"
' Format Cells to final product
Range("A1:H1").Font.Bold = True
Application.ScreenUpdating = True
With ActiveWindow
.SplitColumn = 0
.SplitRow = 1
.FreezePanes = True
End With
Range("A1:" & Bottom.Offset(0, 6).Address).Borders(xlInsideHorizontal).LineStyle = xlNone
Range("A1:" & Bottom.Offset(0, 6).Address).Borders(xlInsideVertical).LineStyle = xlNone
Range("A2:" & Bottom.Offset(0, 6).Address).HorizontalAlignment = xlLeft
Range(Bottom.Offset(0, -3).Address & ":" & Bottom.Offset(100, 6).Address).Borders.LineStyle = xlNone
ActiveSheet.Columns("A:H").IndentLevel = 0
ActiveSheet.Columns("A:H").AutoFit
Range("A1").ColumnWidth = 10
Range("C1").ColumnWidth = 20
Range("D1").ColumnWidth = 15
Range("G1:H1").ColumnWidth = 10
With Range("A1:H" & Bottom.Row).Font
.Name = "Courier New"
.Size = 10
End With
For i = 2 To Bottom.Row
CurrentCell = Cells(i, 2)
' Remove leading spaces in General Titles
If HasSpace.Test(CurrentCell) <> False Then
' SpaceCount = 0
NoSpaceCell = Cells(i, 2)
While HasSpace.Test(NoSpaceCell) <> False
' SpaceCount = SpaceCount + 1
NoSpaceCell = Right(NoSpaceCell, Len(NoSpaceCell) - 1)
Wend
Cells(i, 2) = NoSpaceCell
End If
' Remove spaces in Classification
CurrentCell = Cells(i, 8)
If AllSpaces.Test(CurrentCell) <> False Then
NoSpaces = AllSpaces.Replace(CurrentCell, " ")
Cells(i, 8) = NoSpaces
End If
Next i
End Function
*** The Examples provided below do not represent the true data that was submitted and is only a small example
set of the data that the program is required to sift through and gather the desired data. ***
Sheet1
The data that was exported from a pdf file into excel. It is unorganized and contains alot of non-pertinent data.
Because of this there is a large function that breaks out the relevant data and deletes extra rows and columns.
Sheet2
The Sheet2 sheet contains the General Table Of Contents (TOC).
Once again this is data exported directly from a pdf file into excel. The image provides example of the majority of
random orientations the data was presented to my program. My program had to detect the entire section number, as
well as get the entire title of the section.
Sheet3
Finally there is the Sheet3 sheet. This sheet contains the paragraph numbers and title of the General "chapters".
This is the general format for each of the chapters. There is three parts Data 1,
Data 2, Data 3. Each of these parts can have 0 to 50 paragraphs each within any given General Section.
This quickly adds up and these sheets can have a couple thousand rows of data with weird outlier cases sprinkled
throughout.
The Final_Format Workbook is the product of these three sheets.
Final_Format
Sheet one is named Final_Format, it contains the General Data.
From Sheet 1 of the input we gather the General Section, General Title, GD#, and Classification. From Sheet 3 the Paragraph and Descripton are provided
Data 1 Sheet
The second Data 2 sheet provides the breakdown for all data gathered from Data 2 part of the section TOC. Here
the General Title and General Section are gathered from the input
sheet 2. The Paragraph# and Paragraph Title are from input
sheet 3.
Spare Data is formatted the same as Data 1 and 2, it has no data at this time and will be populated manually.
Data 2 Sheet
This is output sheet 4 which is much like sheet 2 only it gathers the Data 3 parts
of each section.
This program runs out to 685 lines of code so I will not be going into full detail. Instead I will jump into some of the more interesting implimentations used to organize the data.
First a quick overview of what the main function does. It starts with a little bit of error checking to be
sure all the sheets that need to be there are in fact there. Next some worksheets are defined for future use
I create a new sheet for the final product of the FormattedGeneral. This is for easier
access to the data I need for populating the other sheets.
The ParagraphTitles are then formatted the same operation is done for the
GeneralSheet. At the same time the data for Parts 2 and 3 are pulled and formatted into
their respective sheets.
Then the Data sheet is formatted, all excess rows and columns are removed. Populating the GD
numbers is part of the General Data.
The final few operations are to pull the sheets from the current formatter workbook into their own workbook.
The empty SpareDataSheet is created. The last thing to do is to clean up the original sheet,
and turn on the UI elements that were disabled.
I feel it is pertinent to mention the use of error checking in this program as it is one of my few implimentations, sadly. I have only recently began to impliment error checking as it was never neccissary.
All the obvious objects that are interacted with are checked for. It makes sure Data Formatter.xlsm is the current workbook.
That the Sheets.Count == 3
That each sheet has one of the exepected names Sheet1, Sheet2, Sheet3.
If all cases are valid the CheckWorkbookNames returns true. Otherwise an error is
displayed and CheckWorkbookNames is set to false and the function exits. This will set
NameCheck to equal false and cause the Sub to exit.
I utilized quite a bit of regex expressions through the data collection process. Most notibly in this function
These are used to find and to seperate the numbers for populating into the PartTwo
and PartThree sheets. There is also some data that is similar throughout the pages
that required for some exception checks. These are the PageNum,
ReplacePart, and MultipleParaNum patterns.
In the case of the MultipleParaNum there was an exception where multiple paragraph
numbers were put into the same cell. I was unable to programatically seperate these and so the
Check Cell column was created. Anytime one of these values was found the cell number
is noted and put into this column.
This example also shows a major fault that runs as a theme throughout the program. That being the excessive
length of the functions. This portion could very easily be broken out into its own function.
After all of these checks The function does what it says it does and populates the section number and
paragraph titles into the Data 1 and Data 2 sheets.
Following the data population the Data Sheets are styled. This means that the columns are given the correct width and font styles are applied.
There is some similar Regular expressions used here but the special cases where different so I had to accomodate for them
The FirstCellFormat will detect if the cell overflowed into the cell below and concatinate the
values.
NoAZCell is used because cells with letters have their own way of being handled.
ConcatCell and ConcatCellDot are basically different varieties of what the General
number could look like.
Each cell in the FinalSheet is tested for certain characteristics. Depending on these
different patterns the data will be modified.
FindSection is used to find cells that do not contain a section number and removes these cells.
FindSD is used to avoid modifying cells that contain a GD number.
The cells that contain the FindParagraph and FindPart patterns have data that
I will want to merge if there is overflow.
The most important peice of this function was the population of the GD number. This was a problem that took quite
a bit of time to solve.
I needed to populate the cells from the occurance of one GD to the occurance of the next GD number with the value
of the first GD number. The Problem arose when the loop reached the bottom of the sheet.
First of all the bottom is always moving because rows are being deleted, then how to detect that the next cell is
back at the top?
My solution was to use a Do While loop. This way I can detect if the Lookup
address is the same as the NextLookup after all operations have been completed.
Other than that it just gets the range between the current GD and the next GD and populates that range of cells.
The last function sets the cell styles, font styles, and removes some spaces that are carried over from the origonal pdf file.
This is one of the largest programs I have written. This is largely do to the many individual caviots that each of
the pdf exports brought in.
The end result was a well formatted couple of sheets that the estimator was able to use.